package org.yi.web.novel.rule;

import java.io.File;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import javax.script.ScriptException;

import org.apache.commons.lang.StringUtils;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.io.OutputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.yi.core.annotation.Action;
import org.yi.core.helper.YiXMLWriter;
import org.yi.core.utils.MathUtils;
import org.yi.spider.helper.ParseHelper;
import org.yi.spider.helper.RuleHelper;
import org.yi.spider.helper.SpiderHelper;
import org.yi.spider.model.CollectParam;
import org.yi.spider.model.Rule;
import org.yi.spider.model.Site;
import org.yi.web.account.entity.AccountEntity;
import org.yi.web.base.BaseController;

import com.jfinal.kit.PathKit;

/**
 * 小说采集规则管理类
 * @author qq
 *
 */
@Action(action = "/rule/novel")
public class NovelRuleController extends BaseController {
	
	private static final Logger logger = LoggerFactory.getLogger(NovelRuleController.class);
	
	public String getRulePath() {
		String rulePath = PathKit.getWebRootPath() + "/_rules/";
		AccountEntity account = getFrontUser();
		if(account != null)
			rulePath = rulePath + account.getStr("login_name") + "/";
		return rulePath;
	}
	
	public void index() {
		list();
	}
	
	/**
	 * 进入所有规则页面
	 */
	public void list(){
		//查询规则文件夹下的所有规则文件， 列出来
		String[] files = new File(getRulePath()).list(new FilenameFilter() {
			@Override
			public boolean accept(File dir, String name) {
				if(name.endsWith(".xml")) {
					return true;
				}
				return false;
			}
		});
		setAttr("files", files);
		render("list.html");
	}

	/**
	 * 下载文件
	 */
	public void down() {
		String fileName = getPara("name");
		File file = new File(getRulePath() + fileName);
		renderFile(file);
	}
	
	/**
	 * 下载文件
	 */
	public void edit() {
		String fileName = getPara("name");
		if(StringUtils.isBlank(fileName)) {
			forwardAction("/rule/novel/list");
		} else {
			parseRuleFile(fileName);
			render("create.html");
		}
	}
	
	public void delete() {
		String fileName = getPara("name");
		File f = new File(getRulePath() + fileName);
		if(f.isFile() && f.canRead()) {
			if(!f.delete()) {
				addError("删除文件[%s]失败", fileName);
			}
		}
		forwardAction("/rule/novel/list");
	}
	
	/**
	 * 解析规则文件， 将规则传入前台
	 * @param fileName
	 */
	private void parseRuleFile(String fileName) {
		File file = new File(getRulePath() + fileName); 
		if(file.exists()) {
			try {
				Map<String, Rule> ruleMap = RuleHelper.parseXml(file);
				
				Set<Entry<String, Rule>> entrySet = ruleMap.entrySet();
				Iterator<Entry<String, Rule>> iterator = entrySet.iterator();
				while(iterator.hasNext()) {
					Entry<String, Rule> next = iterator.next();
					String key = next.getKey();
					Rule rule = next.getValue();
					rule.setPattern(rule.getPattern().replace("\"", "&quot;").replace("&nbsp;", "&amp;nbsp;"));
					if(rule.getFilterPattern() != null) {
						rule.setFilterPattern(rule.getFilterPattern().replace("\"", "&quot;").replace("&nbsp;", "&amp;nbsp;"));
					}
					getRequest().setAttribute(key, rule);
				}
				getRequest().setAttribute("fileName", fileName);
			} catch (DocumentException e) {
				logger.error(e.getMessage(), e);
			}
		}
	}

	public void create() {
		render("create.html");
	}
	
	public void doCreate() {
		//接收到POST请求，将页面内容进行保存
		String result = "";
		try {
			File file = createRuleFile();
			if(file != null) {
				result = "success";
			}
		} catch (IOException e) {
			result = "fail";
			logger.error(e.getMessage(), e);
		}
		renderJson(result);
	}

	/**
	 * 保存规则文件
	 * @throws IOException 
	 */
	private File createRuleFile() throws IOException {
		
		Document document = DocumentHelper.createDocument();
		Element root = document.addElement("RuleConfigInfo");
		root.addNamespace("xsi", "http://www.w3.org/2001/XMLSchema-instance");
		root.addNamespace("xsd", "http://www.w3.org/2001/XMLSchema");
		
		Map<String, Rule> ruleMap = createRuleMap();
		Set<Entry<String, Rule>> entrySet = ruleMap.entrySet();
		Iterator<Entry<String, Rule>> iterator = entrySet.iterator();
		while(iterator.hasNext()) {
			Entry<String, Rule> next = iterator.next();
			String key = next.getKey();
			Rule rule = next.getValue();
			createDetailRule(root, key, rule);
		}
		
		String fileName = getPara("ruleFileName");
		if(StringUtils.isBlank(fileName)) {
			fileName = new SimpleDateFormat("yyyyMMdd_HH24mmss").format(Calendar.getInstance().getTime()) ;
		}
		if(!fileName.endsWith(".xml")){
			fileName += ".xml";
		}
		
		OutputFormat format = OutputFormat.createPrettyPrint(); //设置XML文档输出格式
		format.setOmitEncoding(false);
		format.setNewLineAfterDeclaration(false);
        format.setEncoding("UTF-8"); //设置XML文档的编码类型
        format.setIndent(true); //设置是否缩进
        format.setIndent(" "); //以空格方式实现缩进
        format.setNewlines(true); //设置是否换行
        format.setLineSeparator("\r\n");
        format.setXHTML(true);
        
        if(!new File(getRulePath()).exists()) {
			new File(getRulePath()).mkdirs();
		}
        
        File file = new File(getRulePath() + "/" + fileName);
        YiXMLWriter writer = new YiXMLWriter(new FileOutputStream(file), format);
        writer.setPreserve(true);
		writer.write(document);
	    writer.close();
	    
	    return file;
	    
	}

	/**
	 * 根据规则对象创建xml中的规则节点
	 * @param root
	 * @param key
	 * @param rule
	 */
	private void createDetailRule(Element root, String key, Rule rule) {
		Element detailRule = root.addElement(key);
		
		Element regexNameElement = detailRule.addElement("RegexName");
		if(StringUtils.isNotBlank(rule.getRegexName())) {
			regexNameElement.setText(rule.getRegexName());
		}
		Element patternElement = detailRule.addElement("Pattern");
		if(StringUtils.isNotBlank(rule.getPattern())) {
			patternElement.setText(rule.getPattern());
		}
		Element methodElement = detailRule.addElement("Method");
		if(StringUtils.isNotBlank(rule.getMethod())) {
			methodElement.setText(rule.getMethod());
		}
		Element filterPatternElement = detailRule.addElement("FilterPattern");
		if(StringUtils.isNotBlank(rule.getFilterPattern())) {
			filterPatternElement.setText(rule.getFilterPattern());
		}
		Element optionsElement = detailRule.addElement("Options");
		if(StringUtils.isNotBlank(rule.getOptions())) {
			optionsElement.setText(rule.getOptions().trim());
		}
	}

	/**
	 * 测试规则
	 */
	public void test() {
		
		StringBuffer result = new StringBuffer();
		
		CollectParam cpm = new CollectParam();
		Site remoteSite = new Site();
		cpm.setRemoteSite(remoteSite);
		
		//传入采集规则
		Map<String, Rule> ruleMap =  new HashMap<String, Rule>();
		try {
			ruleMap = createRuleMap();
		} catch (UnsupportedEncodingException e) {
			result.append("创建规则错误: " + e.getMessage() + "<br/>");
		}
		cpm.setRuleMap(ruleMap);
		
		result.append("规则版本: " + ParseHelper.getRuleVersion(cpm) + "<br/>");
		
		result.append("目标站名称: " + ParseHelper.getSiteName(cpm) + "<br/>");
		remoteSite.setSiteName(ParseHelper.getSiteName(cpm));
		
		result.append("目标站编码: " + ParseHelper.getSiteCharset(cpm) + "<br/>");
		remoteSite.setCharset(ParseHelper.getSiteCharset(cpm));
		
		result.append("目标站地址: " + ParseHelper.getSiteUrl(cpm) + "<br/>");
		remoteSite.setSiteUrl(ParseHelper.getSiteUrl(cpm));
		
		String msg = "";
		boolean stop = false;
		
		//判断是否存在搜索， 若存在则根据搜索规则反查小说
		if(StringUtils.isNotBlank(cpm.getRuleMap().get(Rule.RegexNamePattern.NOVELSEARCH_URL).getPattern())
				&& StringUtils.isNotBlank(cpm.getRuleMap().get(Rule.RegexNamePattern.TESTSEARCH_NOVELNAME).getPattern())
				&& StringUtils.isNotBlank(cpm.getRuleMap().get(Rule.RegexNamePattern.NOVELSEARCH_GETNOVELKEY).getPattern())
				&& StringUtils.isNotBlank(cpm.getRuleMap().get(Rule.RegexNamePattern.NOVELSEARCH_GETNOVELNAME).getPattern())) {
			try {
				Map<String, Object> searchResult = ParseHelper.getSearchContent(cpm);
				if(searchResult == null) {
					result.append("反查目标站小说号失败。<br/>");
				} else {
					String searchNovelNo = ParseHelper.getSearchNovelNo(cpm);
					result.append("目标站小说号: "+searchNovelNo+"<br/>");
				}
			} catch (Exception e) {
				result.append("反查目标站小说号失败。<br/>");
			}
		}
		
		List<String> articleNoList = new ArrayList<String>();
		try {
			articleNoList = SpiderHelper.getArticleNo(cpm);
			msg = "";
			if(articleNoList != null && articleNoList.size()>0) {
				msg = articleNoList.toString();
				stop = false;
			} else {
				msg = "未能获取到小说编号， 请检查[目标站编码][列表地址][获得小说编号]是否输入正确， 是否需要忽略大小写。<br/>";
				stop = true;
			}
		} catch (Exception e) {
			msg = "列表页小说编号-错误: " + e.getMessage() + ", 请检查[目标站编码][列表地址][获得小说编号]是否输入正确， 是否需要忽略大小写。<br/>";
			stop = true;
		}
		result.append("列表页小说编号: " + msg + "<br/>");
		
		if(!stop) {
			String novelNo = articleNoList.get(MathUtils.randomInt(0, articleNoList.size()-1));
			String novelNo2 = "";
			//如果获得真实小说编号不为空， 则通过随机获取的小说号获取真实小说编号
			if(StringUtils.isNotBlank(cpm.getRuleMap().get(Rule.RegexNamePattern.NOVELLIST_GETNOVELKEY2).getPattern())) {
				novelNo2 = ParseHelper.getNovelNo2(cpm, novelNo);
				result.append("真实小说编号: " + novelNo2 + "<br/>");
			}
			String infoURL = "";
			try {
				infoURL = ParseHelper.getInfoRUL(cpm, novelNo);
			} catch (ScriptException e2) {
				infoURL = "";
			}
			
			if(StringUtils.isBlank(infoURL)){
				infoURL = "未能获取到信息页地址<br/>";
				stop = true;
			}
			result.append("小说信息页地址: " + infoURL + "<br/>");
			if(!stop) {
				String infoSource = "";
				try {
					infoSource = ParseHelper.getSource(cpm, infoURL);
				} catch (IOException e1) {
					msg = "未能获取到小说信息页源码<br/>";
					stop = true;
				}
				if(!stop) {
					result.append("小说名: " + ParseHelper.getNovelName(infoSource, cpm) + "<br/>");
					result.append("小说作者: " + ParseHelper.getNovelAuthor(infoSource, cpm) + "<br/>");
					result.append("小说大类: " + ParseHelper.getTopCategory(infoSource, cpm) + "<br/>");
					result.append("小说小类: " + ParseHelper.getSubCategory(infoSource, cpm) + "<br/>");
					result.append("小说简介: " + ParseHelper.getNovelIntro(infoSource, cpm) + "<br/>");
					result.append("小说关键字: " + ParseHelper.getNovelKeywrods(infoSource, cpm) + "<br/>");
					result.append("写作进度: " + ParseHelper.getNovelDegree(infoSource, cpm) + "<br/>");
					result.append("小说封面: " + ParseHelper.getNovelCoverURL(infoSource, cpm) + "<br/>");
					Rule novelInfoExtra = cpm.getRuleMap().get(Rule.RegexNamePattern.NOVEL_INFO_EXTRA);
					if(novelInfoExtra != null && StringUtils.isNotBlank(novelInfoExtra.getPattern())){
						result.append("额外信息: " + ParseHelper.getNovelInfoExtra(infoSource, cpm) + "<br/>");
					}
					
					// 小说目录页地址
			        String novelPubKeyURL = "";
					try {
						novelPubKeyURL = ParseHelper.getNovelMenuURL(infoSource, novelNo, cpm);
					} catch (Exception e) {
						novelPubKeyURL = "";
					}
			        result.append("目录页地址: " + novelPubKeyURL + "<br/>");
			        
			        // 小说目录页源码
					String menuSource = "";
					try {
						menuSource = ParseHelper.getChapterListSource(novelPubKeyURL, cpm);
					} catch (Exception e) {
						stop = true;
						msg = "";
					}
					if(!stop) {
						List<String> chapterNameList = ParseHelper.getChapterNameList(menuSource, cpm);
						List<String> chapterNoList = ParseHelper.getChapterNoList(menuSource, cpm);
						if(chapterNameList != null && chapterNoList != null) {
							result.append("章节名数量: "+chapterNameList.size()+",章节编号数量：" + chapterNoList.size() + "<br/>" );
							result.append("章节名: "+chapterNameList.toString() + "<br/>");
							
							if(chapterNoList.size() > 0) {
								// 章节地址-不完全地址
								String chapterURL = "";
								try {
									chapterURL = ParseHelper.getChapterURL(novelPubKeyURL, 
											novelNo, chapterNoList.get(MathUtils.randomInt(0, chapterNoList.size()-1)), cpm);
								} catch (Exception e) {
									result.append("章节地址规则中计算表达式错误<br/>");
									stop = true;
								}
								// 章节页源码
								String chapterSource = "";
								try {
									chapterSource = ParseHelper.getChapterSource(chapterURL, cpm);
								} catch (Exception e) {
									result.append("未能获取章节页源码<br/>");
									stop = true;
								}
								if(!stop) {
									// 如果存在真实章节内容页规则， 则从章节页源码中查找真实章节页地址
									Rule pubContentURL2 = cpm.getRuleMap().get(Rule.RegexNamePattern.PUBCONTENT_URL2);
									if(pubContentURL2 != null && StringUtils.isNotBlank(pubContentURL2.getPattern())) {
										chapterURL = ParseHelper.getPubContentURL2(chapterSource, cpm);
										if(StringUtils.isBlank(chapterURL)) {
											result.append("未能获取章节页真实URL<br/>");
										} else {
											result.append("章节页真实URL: "+chapterURL+"<br/>");
											try {
												chapterSource = ParseHelper.getChapterSource(chapterURL, cpm);
											} catch (Exception e) {
												result.append("抓取章节页真实URL内容失败<br/>");
												stop = true;
											}
										}
									}
									if(!stop) {
										// 章节内容
										String chapterContent = ParseHelper.getChapterContent(chapterSource, cpm);
										result.append("章节内容： " + chapterContent);
									}
								}
							}
						}
					}
				}
			}
		}
		
		renderJson(result.toString());
	}

	/**
	 * 按照测试类型创建测试规则模型
	 * @param grade	信息页：15 章节列表：25	所有：50
	 * @return
	 * @throws UnsupportedEncodingException 
	 */
	private Map<String,Rule> createRuleMap() throws UnsupportedEncodingException{
		Map<String,Rule> ruleMap = new HashMap<String, Rule>();
		
		ruleMap.put("RuleVersion", createRule("RuleVersion", decodeURL(getPara("RuleVersion")),
						null, null, null));
		ruleMap.put("GetSiteName", createRule("GetSiteName", decodeURL(getPara("GetSiteName")),
				null, null, null));
		ruleMap.put("GetSiteCharset", createRule("GetSiteCharset", decodeURL(getPara("GetSiteCharset")),
				null, null, null));
		ruleMap.put("GetSiteUrl", createRule("GetSiteUrl", decodeURL(getPara("GetSiteUrl")),
				null, null, null));
		
		ruleMap.put("NovelSearchUrl", createRule("NovelSearchUrl", decodeURL(getPara("NovelSearchUrl")),
				null, null, null));
		ruleMap.put("TestSearchNovelName", createRule("TestSearchNovelName", decodeURL(getPara("TestSearchNovelName")),
				null, null, null));
		ruleMap.put("NovelSearch_GetNovelKey", createRule("NovelSearch_GetNovelKey", decodeURL(getPara("NovelSearch_GetNovelKey")),
				null, null, null));
		ruleMap.put("NovelSearch_GetNovelName", createRule("NovelSearch_GetNovelName", decodeURL(getPara("NovelSearch_GetNovelName")),
				null, null, null));
		
		ruleMap.put("NovelListUrl", createRule("NovelListUrl", decodeURL(getPara("NovelListUrl")),
				null, null, null));
		ruleMap.put("NovelList_GetNovelKey", 
				createRule("NovelList_GetNovelKey", decodeURL(getPara("NovelList_GetNovelKey")), null, null, 
						getParaToBoolean("NovelList_GetNovelKey_IgnoreCase") ? "IgnoreCase " : ""));
		ruleMap.put("NovelList_GetNovelKey2", 
				createRule("NovelList_GetNovelKey2", decodeURL(getPara("NovelList_GetNovelKey2")), null, null, 
						getParaToBoolean("NovelList_GetNovelKey2_IgnoreCase") ? "IgnoreCase " : ""));
		
		ruleMap.put("NovelUrl", createRule("NovelUrl", decodeURL(getPara("NovelUrl")),
				null, null, null));
		ruleMap.put("NovelName", 
				createRule("NovelName", decodeURL(getPara("NovelName")), decodeURL(getPara("NovelName_FilterPattern")), null, 
						getParaToBoolean("NovelName_IgnoreCase") ? "IgnoreCase " : ""));
		ruleMap.put("NovelAuthor", 
				createRule("NovelAuthor", decodeURL(getPara("NovelAuthor")), null, null, 
						getParaToBoolean("NovelAuthor_IgnoreCase") ? "IgnoreCase " : ""));
		ruleMap.put("LagerSort", 
				createRule("LagerSort", decodeURL(getPara("LagerSort")), null, null, 
						getParaToBoolean("LagerSort_IgnoreCase") ? "IgnoreCase " : ""));
		ruleMap.put("SmallSort", 
				createRule("SmallSort", decodeURL(getPara("SmallSort")), null, null, 
						getParaToBoolean("SmallSort_IgnoreCase") ? "IgnoreCase " : ""));
		ruleMap.put("NovelIntro", 
				createRule("NovelIntro", decodeURL(getPara("NovelIntro")), decodeURL(getPara("NovelIntro_FilterPattern")), null, 
						getParaToBoolean("NovelIntro_IgnoreCase") ? "IgnoreCase " : ""));
		ruleMap.put("NovelKeyword", 
				createRule("NovelKeyword", decodeURL(getPara("NovelKeyword")), null, null, 
						getParaToBoolean("NovelKeyword_IgnoreCase") ? "IgnoreCase " : ""));
		ruleMap.put("NovelDegree", 
				createRule("NovelDegree", decodeURL(getPara("NovelDegree")), decodeURL(getPara("NovelDegree_FilterPattern")), null, 
						getParaToBoolean("NovelDegree_IgnoreCase") ? "IgnoreCase " : ""));
		ruleMap.put("NovelInfoExtra", 
				createRule("NovelInfoExtra", decodeURL(getPara("NovelInfoExtra")), decodeURL(getPara("NovelInfoExtra_FilterPattern")), null, 
						getParaToBoolean("NovelInfoExtra_IgnoreCase") ? "IgnoreCase " : ""));
		ruleMap.put("NovelCover", 
				createRule("NovelCover", decodeURL(getPara("NovelCover")), null, null, 
						getParaToBoolean("NovelCover_IgnoreCase") ? "IgnoreCase " : ""));
		ruleMap.put("NovelDefaultCoverUrl", 
				createRule("NovelDefaultCoverUrl", decodeURL(getPara("NovelDefaultCoverUrl")),
				null, null, null));
		ruleMap.put("NovelInfo_GetNovelPubKey", 
				createRule("NovelInfo_GetNovelPubKey", decodeURL(getPara("NovelInfo_GetNovelPubKey")), null, null, 
						getParaToBoolean("NovelInfo_GetNovelPubKey_IgnoreCase") ? "IgnoreCase " : ""));
		ruleMap.put("PubIndexUrl", createRule("PubIndexUrl", decodeURL(getPara("PubIndexUrl")),
				null, null, null));
		
		ruleMap.put("PubChapterRegion", 
				createRule("PubChapterRegion", decodeURL(getPara("PubChapterRegion")), decodeURL(getPara("PubChapterRegion_FilterPattern")), null, 
						getParaToBoolean("PubChapterRegion_IgnoreCase") ? "IgnoreCase " : ""));
		ruleMap.put("PubChapterName", 
				createRule("PubChapterName", decodeURL(getPara("PubChapterName")), decodeURL(getPara("PubChapterName_FilterPattern")), null, 
						getParaToBoolean("PubChapterName_IgnoreCase") ? "IgnoreCase " : ""));
		ruleMap.put("PubChapter_GetChapterKey", 
				createRule("PubChapter_GetChapterKey", decodeURL(getPara("PubChapter_GetChapterKey")), null, null, 
						getParaToBoolean("PubChapter_GetChapterKey_IgnoreCase") ? "IgnoreCase " : ""));
		ruleMap.put("PubContentUrl", createRule("PubContentUrl", decodeURL(getPara("PubContentUrl")),
				null, null, null));
		ruleMap.put("PubContentUrl2", createRule("PubContentUrl2", decodeURL(getPara("PubContentUrl2")),
				null, null, null));
		
		ruleMap.put("PubContentText", 
				createRule("PubContentText", decodeURL(getPara("PubContentText")), decodeURL(getPara("PubContentText_FilterPattern")), null, 
						getParaToBoolean("PubContentText_IgnoreCase") ? "IgnoreCase " : ""));
		ruleMap.put("PubContentText_ASCII", 
				createRule("PubContentText_ASCII", decodeURL(getPara("PubContentText_ASCII")), null, null, null));
		
		ruleMap.put("PubContentText_FT2JT", 
				createRule("PubContentText_FT2JT", decodeURL(getPara("PubContentText_FT2JT")), null, null, null));
		ruleMap.put("PubContentText_BJ2QJ", 
				createRule("PubContentText_BJ2QJ", decodeURL(getPara("PubContentText_BJ2QJ")), null, null, null));
		
		
		return ruleMap;
	}
	
	/**
	 * 创建规则-基础信息
	 * @param regexName
	 * @param pattern
	 * @param filterPattern
	 * @param method
	 * @param options
	 * @return
	 */
	private Rule createRule(String regexName, String pattern,
			String filterPattern, String method, String options){
		Rule rule = new Rule(); 
		rule.setRegexName(regexName);
		rule.setPattern(pattern);
        rule.setFilterPattern(filterPattern);
        rule.setMethod(method);
        rule.setOptions(options);
        return rule;
	}
	
	private String decodeURL(String param) throws UnsupportedEncodingException {
		return param;
	}
}
